This notebook compares doublet results calculated on benchmarking datasets to one another, with the primary goal of addressing these questions:
There are several items to bear in mind when interpreting these results:
cxds, as we are using it, does not have a specific
threshold for calling droplets. By contrast, both scrublet
and scDblFinder identify a threshold based on the given
dataset they are processing. This notebook uses a doublet threshold of
>=0.5 for cxds, which may not be
universally suitable (though choosing a universally suitable threshold
is not easy either!).suppressPackageStartupMessages({
library(SingleCellExperiment)
library(ggplot2)
library(patchwork)
library(caret)
library(UpSetR)
})
theme_set(theme_bw())
# define threshold used to call cxds
cxds_threshold <- 0.5
module_base <- rprojroot::find_root(rprojroot::is_renv_project)
data_dir <- file.path(module_base, "scratch", "benchmark-datasets")
result_dir <- file.path(module_base, "results", "benchmark-results")
plot_pca_calls <- function(df,
color_column,
plot_type = "calls",
title) {
# Plot PCs colored by either:
# if plot_type == "calls", color by singlet or doublet, showing doublets on top
# if plot_type == "class", color by singlet, doublet, or ambiguous, showing doublets & ambiguous on top
# df is expected to contain columns PC1, PC2, `color_column`, which should _not_ be provided as a string
plot <- ggplot(df) +
aes(
x = PC1,
y = PC2,
color = {{color_column}}
) +
geom_point(
size = 0.75,
alpha = 0.6
) +
ggtitle(title) +
guides(color = guide_legend(override.aes = list(size = 2)))
if (plot_type == "calls") {
plot <- plot +
scale_color_manual(
name = "",
values = c("doublet" = "black", "singlet" = "gray90")) +
geom_point(
data = dplyr::filter(df, {{color_column}} == "doublet"),
color = "black",
size = 0.75
)
} else if (plot_type == "class") {
plot <- plot +
scale_color_manual(
name = "Consensus",
values = c("ambiguous" = "yellow",
"doublet" = "black",
"singlet" = "gray90")
) +
geom_point(
data = dplyr::filter(df, consensus_class != "singlet"),
size = 0.75,
alpha = 0.7,
aes(color = consensus_class),
)
} else {
stop("plot_type must be 'calls' or 'class'")
}
return(plot)
}
plot_pca_metrics <- function(df, color_column) {
# Plot PCs colored by performance metric, showing false calls on top
# metric_colors is a named vector of colors used for coloring tp/tn/fp/fn
# used in PCA plots
metric_colors <- c(
"tp" = "lightblue",
"tn" = "pink",
"fp" = "blue",
"fn" = "firebrick2"
)
ggplot(df) +
aes(x = PC1,
y = PC2,
color = {{color_column}}) +
geom_point(
size = 0.75,
alpha = 0.6
) +
geom_point(
data = dplyr::filter(df, {{color_column}} %in% c("fp", "fn")),
size = 0.75
) +
scale_color_manual(name = "Call type", values = metric_colors) +
ggtitle("Consensus class metrics") +
guides(color = guide_legend(override.aes = list(size = 2)))
}
First, we’ll read in and combine doublet results, as well as PCA coordinates, into a list of data frames for each dataset.
# find all dataset names to process:
dataset_names <- list.files(result_dir, pattern = "*_scrublet.tsv") |>
stringr::str_remove("_scrublet.tsv")
# Read in data for analysis
doublet_df_list <- dataset_names |>
purrr::map(
\(dataset) {
scdbl_tsv <- file.path(result_dir, glue::glue("{dataset}_scdblfinder.tsv"))
scrub_tsv <- file.path(result_dir, glue::glue("{dataset}_scrublet.tsv"))
sce_file <- file.path(data_dir, dataset, glue::glue("{dataset}_sce.rds"))
scdbl_df <- scdbl_tsv |>
readr::read_tsv(show_col_types = FALSE) |>
dplyr::select(
barcodes,
cxds_score,
scdbl_score = score,
scdbl_prediction = class
) |>
# add cxds calls at `cxds_threshold` threshold
dplyr::mutate(
cxds_prediction = dplyr::if_else(
cxds_score >= cxds_threshold,
"doublet",
"singlet"
)
)
scrub_df <- readr::read_tsv(scrub_tsv, show_col_types = FALSE)
# grab ground truth and PCA coordinates
sce <- readr::read_rds(sce_file)
scuttle::makePerCellDF(sce, use.dimred = "PCA") |>
tibble::rownames_to_column(var = "barcodes") |>
dplyr::select(barcodes,
ground_truth = ground_truth_doublets,
PC1 = PCA.1,
PC2 = PCA.2) |>
dplyr::left_join(
scrub_df,
by = "barcodes"
) |>
dplyr::left_join(
scdbl_df,
by = "barcodes"
)
}
) |>
purrr::set_names(dataset_names)
This section contains upset plots that show overlap across doublet calls from each method, displayed for each dataset.
doublet_df_list |>
purrr::iwalk(
\(df, dataset) {
doublet_barcodes <- list(
"scDblFinder" = df$barcodes[df$scdbl_prediction == "doublet"],
"scrublet" = df$barcodes[df$scrublet_prediction == "doublet"],
"cxds" = df$barcodes[df$cxds_prediction == "doublet"]
)
UpSetR::upset(fromList(doublet_barcodes), order.by = "freq") |> print()
grid::grid.text( # plot title
dataset,
x = 0.65,
y = 0.95,
gp = grid::gpar(fontsize=16)
)
}
)
Next, we will explore the consensus of doublet predictions across methods. To this end, we’ll create some new columns for each dataset:
consensus_class, which will be “doublet” if all three
methods are doublet, “singlet” if all three methods are singlet, and
“ambiguous” if there are any disagreementsconsensus_call, which will be “doublet” if all
methods predict “doublet,” and “singlet” otherwiseconfusion_call, which will classify the consensus call
as one of “tp”, “tn”, “fp”, or “fn” (true/false positive/negative) based
on consensus_calldoublet_df_list <- doublet_df_list |>
purrr::map(
\(dataset_df) {
dataset_df |>
dplyr::rowwise() |>
dplyr::mutate(
# Add column `consensus_call`
# "doublet" if all three methods are doublet, and "singlet" otherwise
consensus_call = dplyr::if_else(
all(c(scdbl_prediction, scrublet_prediction, cxds_prediction) == "doublet"),
"doublet",
"singlet"
),
# Add column `consensus_class`
# "doublet" if all three methods are doublet, "singlet" if all three methods are singlet,
# and "ambiguous" if there are any disagreements
consensus_class = dplyr::case_when(
consensus_call == "doublet" ~ "doublet",
all(c(scdbl_prediction, scrublet_prediction, cxds_prediction) == "singlet") ~ "singlet",
.default = "ambiguous"
),
# Add column `confusion_call`
# tp/tn/fp/fn based on the `consensus_call` column
confusion_call = dplyr::case_when(
consensus_call == "doublet" && ground_truth == "doublet" ~ "tp",
consensus_call == "singlet" && ground_truth == "singlet" ~ "tn",
consensus_call == "doublet" && ground_truth == "singlet" ~ "fp",
consensus_call == "singlet" && ground_truth == "doublet" ~ "fn"
)
)
}
)
This section plots the PCA for each dataset, clockwise from the top left:
scDblFinder singlet/doublet callsscrublet singlet/doublet callscxds singlet/doublet callspc_color_columns <- c("scDblFinder prediction" = "scdbl_prediction",
"scrublet prediction" = "scrublet_prediction",
"cxds prediction" = "cxds_prediction",
"Ground truth" = "ground_truth",
"Consensus call" = "consensus_call")
doublet_df_list |>
purrr::imap(
\(df, dataset) {
pc_plot_list <- pc_color_columns |>
purrr::imap(
\(color_column, plot_title) {
plot_pca_calls(
df,
color_column = !!sym(color_column),
plot_type = "calls",
title = plot_title
) + theme(legend.position = "none")}
)
# plot for consensus_class, which needs three colors
# note that the list name isn't actually used here
pc_plot_list$consensus_class <- plot_pca_calls(
df,
color_column = consensus_class,
plot_type = "class",
title = "Consensus class"
)
# plot metrics
pc_plot_list$metrics <- plot_pca_metrics(
df,
color_column = confusion_call
)
patchwork::wrap_plots(pc_plot_list) +
plot_annotation(
glue::glue("PCA for {dataset}"),
theme = theme(plot.title = element_text(size = 16))) +
plot_layout(guides = "collect")
}
)
$`hm-6k`
$`HMEC-orig-MULTI`
$`pbmc-1B-dm`
$`pdx-MULTI`
This section shows a table of the consensus class counts and calculates a confusion matrix with associated statistics from the consensus calls:
metric_df <- doublet_df_list |>
purrr::imap(
\(df, dataset) {
print(glue::glue("======================== {dataset} ========================"))
cat("Table of consensus class counts:")
print(table(df$consensus_class))
cat("\n\n")
confusion_result <- caret::confusionMatrix(
# truth should be first
table(
"Truth" = df$ground_truth,
"Consensus prediction" = df$consensus_call
),
positive = "doublet"
)
print(confusion_result)
# Extract information we want to present later in a table
tibble::tibble(
"Dataset name" = dataset,
"Kappa" = round(confusion_result$overall["Kappa"], 3),
"Balanced accuracy" = round(confusion_result$byClass["Balanced Accuracy"], 3)
)
}
) |>
dplyr::bind_rows()
======================== hm-6k ========================
Table of consensus class counts:
ambiguous doublet singlet
283 138 6385
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 138 33
singlet 0 6635
Accuracy : 0.9952
95% CI : (0.9932, 0.9967)
No Information Rate : 0.9797
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.8908
Mcnemar's Test P-Value : 2.54e-08
Sensitivity : 1.00000
Specificity : 0.99505
Pos Pred Value : 0.80702
Neg Pred Value : 1.00000
Prevalence : 0.02028
Detection Rate : 0.02028
Detection Prevalence : 0.02512
Balanced Accuracy : 0.99753
'Positive' Class : doublet
======================== HMEC-orig-MULTI ========================
Table of consensus class counts:
ambiguous doublet singlet
3206 641 22579
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 506 3062
singlet 135 22723
Accuracy : 0.879
95% CI : (0.875, 0.8829)
No Information Rate : 0.9757
P-Value [Acc > NIR] : 1
Kappa : 0.2079
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.78939
Specificity : 0.88125
Pos Pred Value : 0.14182
Neg Pred Value : 0.99409
Prevalence : 0.02426
Detection Rate : 0.01915
Detection Prevalence : 0.13502
Balanced Accuracy : 0.83532
'Positive' Class : doublet
======================== pbmc-1B-dm ========================
Table of consensus class counts:
ambiguous doublet singlet
487 38 3265
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 26 104
singlet 12 3648
Accuracy : 0.9694
95% CI : (0.9634, 0.9746)
No Information Rate : 0.99
P-Value [Acc > NIR] : 1
Kappa : 0.2986
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.68421
Specificity : 0.97228
Pos Pred Value : 0.20000
Neg Pred Value : 0.99672
Prevalence : 0.01003
Detection Rate : 0.00686
Detection Prevalence : 0.03430
Balanced Accuracy : 0.82825
'Positive' Class : doublet
======================== pdx-MULTI ========================
Table of consensus class counts:
ambiguous doublet singlet
2945 4 7347
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 3 1314
singlet 1 8978
Accuracy : 0.8723
95% CI : (0.8657, 0.8787)
No Information Rate : 0.9996
P-Value [Acc > NIR] : 1
Kappa : 0.0038
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.7500000
Specificity : 0.8723280
Pos Pred Value : 0.0022779
Neg Pred Value : 0.9998886
Prevalence : 0.0003885
Detection Rate : 0.0002914
Detection Prevalence : 0.1279138
Balanced Accuracy : 0.8111640
'Positive' Class : doublet
Overall, methods do not have substantial overlap with each other. They each tend to detect different sets of doublets, leading to fairly small sets of consensus doublets. Further, the consensus doublets called by all three methods have some, but not substantial, overlap with the ground truth.
For three out of four datasets, scDblFinder predicts a
much larger number of doublets compared to other methods.
Based on the PCAs, it additionally looks like there are many more
false negatives in the consensus prediction that cxds
appears to capture but other methods do not.
The table below summarizes performance of the “consensus caller”.
Note that, in the benchmarking paper
these datasets were originally analyzed in, hm-6k was
observed to be one of the “easiest” datasets to classify across
methods.
Consistent with that observation, it has the highest kappa
value here.
metric_df
scDblFinder and
cxdsThe above analysis suggests that scrublet may be more
conservative than the other two methods, so including it in a consensus
may be increasing the number of false positives. Here, we’ll repeat the
same consensus analyses, but considering only
scDblFinder and cxds. Since there are only two
methods here, we will create two columns for this data:
# modify existing consensus columns to only consider 2 methods
doublet_df_list <- doublet_df_list |>
purrr::map(
\(dataset_df) {
dataset_df <- dataset_df |>
dplyr::rowwise() |>
# Update column `consensus_call`
dplyr::mutate(
consensus_call = dplyr::if_else(
all(c(scdbl_prediction, cxds_prediction) == "doublet"),
"doublet",
"singlet"
)
) |>
# Add Update `consensus_class`
dplyr::mutate(
consensus_class = dplyr::case_when(
consensus_call == "doublet" ~ "doublet",
all(c(scdbl_prediction, cxds_prediction) == "singlet") ~ "singlet",
.default = "ambiguous"
)
) |>
# Add Update `confusion_call`
dplyr::mutate(
confusion_call = dplyr::case_when(
consensus_call == "doublet" && ground_truth == "doublet" ~ "tp",
consensus_call == "singlet" && ground_truth == "singlet" ~ "tn",
consensus_call == "doublet" && ground_truth == "singlet" ~ "fp",
consensus_call == "singlet" && ground_truth == "doublet" ~ "fn"
)
)
return(dataset_df)
}
) |>
purrr::set_names(dataset_names)
This section plots the PCA for each dataset, clockwise from the top left:
scDblFinder singlet/doublet callscxds singlet/doublet callspc_color_columns <- c("scDblFinder prediction" = "scdbl_prediction",
"cxds prediction" = "cxds_prediction",
"Ground truth" = "ground_truth",
"Consensus call" = "consensus_call")
doublet_df_list |>
purrr::imap(
\(df, dataset) {
pc_plot_list <- pc_color_columns |>
purrr::imap(
\(color_column, plot_title) {
plot_pca_calls(
df,
color_column = !!sym(color_column),
plot_type = "calls",
title = plot_title
) + theme(legend.position = "none")}
)
# plot for consensus_class, which needs three colors
# note that the list name isn't actually used here
pc_plot_list$consensus_class <- plot_pca_calls(
df,
color_column = consensus_class,
plot_type = "class",
title = "Consensus class"
)
# plot metrics
pc_plot_list$metrics <- plot_pca_metrics(
df,
color_column = confusion_call
)
patchwork::wrap_plots(pc_plot_list) +
plot_annotation(
glue::glue("PCA for {dataset}"),
theme = theme(plot.title = element_text(size = 16))) +
plot_layout(guides = "collect")
}
)
$`hm-6k`
$`HMEC-orig-MULTI`
$`pbmc-1B-dm`
$`pdx-MULTI`
This section shows a table of the consensus class counts and calculates a confusion matrix with associated statistics from the consensus calls:
metric_df <- doublet_df_list |>
purrr::imap(
\(df, dataset) {
print(glue::glue("======================== {dataset} ========================"))
cat("Table of consensus class counts:")
print(table(df$consensus_class))
cat("\n\n")
confusion_result <- caret::confusionMatrix(
# truth should be first
table(
"Truth" = df$ground_truth,
"Consensus prediction" = df$consensus_call
),
positive = "doublet"
)
print(confusion_result)
# Extract information we want to present later in a table
tibble::tibble(
"Dataset name" = dataset,
"Kappa" = round(confusion_result$overall["Kappa"], 3),
"Balanced accuracy" = round(confusion_result$byClass["Balanced Accuracy"], 3)
)
}
) |>
dplyr::bind_rows()
======================== hm-6k ========================
Table of consensus class counts:
ambiguous doublet singlet
274 144 6388
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 144 27
singlet 0 6635
Accuracy : 0.996
95% CI : (0.9942, 0.9974)
No Information Rate : 0.9788
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.9123
Mcnemar's Test P-Value : 5.624e-07
Sensitivity : 1.00000
Specificity : 0.99595
Pos Pred Value : 0.84211
Neg Pred Value : 1.00000
Prevalence : 0.02116
Detection Rate : 0.02116
Detection Prevalence : 0.02512
Balanced Accuracy : 0.99797
'Positive' Class : doublet
======================== HMEC-orig-MULTI ========================
Table of consensus class counts:
ambiguous doublet singlet
2791 986 22649
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 776 2792
singlet 210 22648
Accuracy : 0.8864
95% CI : (0.8825, 0.8902)
No Information Rate : 0.9627
P-Value [Acc > NIR] : 1
Kappa : 0.2999
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.78702
Specificity : 0.89025
Pos Pred Value : 0.21749
Neg Pred Value : 0.99081
Prevalence : 0.03731
Detection Rate : 0.02937
Detection Prevalence : 0.13502
Balanced Accuracy : 0.83863
'Positive' Class : doublet
======================== pbmc-1B-dm ========================
Table of consensus class counts:
ambiguous doublet singlet
323 168 3299
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 64 66
singlet 104 3556
Accuracy : 0.9551
95% CI : (0.9481, 0.9615)
No Information Rate : 0.9557
P-Value [Acc > NIR] : 0.582698
Kappa : 0.4066
Mcnemar's Test P-Value : 0.004543
Sensitivity : 0.38095
Specificity : 0.98178
Pos Pred Value : 0.49231
Neg Pred Value : 0.97158
Prevalence : 0.04433
Detection Rate : 0.01689
Detection Prevalence : 0.03430
Balanced Accuracy : 0.68137
'Positive' Class : doublet
======================== pdx-MULTI ========================
Table of consensus class counts:
ambiguous doublet singlet
2272 676 7348
Confusion Matrix and Statistics
Consensus prediction
Truth doublet singlet
doublet 328 989
singlet 348 8631
Accuracy : 0.8701
95% CI : (0.8635, 0.8766)
No Information Rate : 0.9343
P-Value [Acc > NIR] : 1
Kappa : 0.2654
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.48521
Specificity : 0.89719
Pos Pred Value : 0.24905
Neg Pred Value : 0.96124
Prevalence : 0.06566
Detection Rate : 0.03186
Detection Prevalence : 0.12791
Balanced Accuracy : 0.69120
'Positive' Class : doublet
Considering only scDblFinder and cxds, we
see several particular differences in statistics:
hm-6k and
HMEC-orig-MULTI are about the same here as for the
consensus among all methods, but accuracy has substantially
decreased for pbmc-1B-dm and
pdx-MULTI1pdx-MULTI.Even so, only hm-6k (the “easy” dataset) has both a high
kappa and high balanced accuracy. While HMEC-orig-MULTI’s
balanced accuracy is fairly high, its kappa value is not.
metric_df
# record the versions of the packages used in this analysis and other environment information
sessionInfo()
R version 4.4.0 (2024-04-24)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices datasets utils methods base
other attached packages:
[1] UpSetR_1.4.0 caret_6.0-94
[3] lattice_0.22-6 patchwork_1.2.0
[5] ggplot2_3.5.1 SingleCellExperiment_1.26.0
[7] SummarizedExperiment_1.34.0 Biobase_2.64.0
[9] GenomicRanges_1.56.0 GenomeInfoDb_1.40.0
[11] IRanges_2.38.0 S4Vectors_0.42.0
[13] BiocGenerics_0.50.0 MatrixGenerics_1.16.0
[15] matrixStats_1.3.0
loaded via a namespace (and not attached):
[1] pROC_1.18.5 gridExtra_2.3 rlang_1.1.3
[4] magrittr_2.0.3 e1071_1.7-14 compiler_4.4.0
[7] DelayedMatrixStats_1.26.0 vctrs_0.6.5 reshape2_1.4.4
[10] stringr_1.5.1 pkgconfig_2.0.3 crayon_1.5.2
[13] XVector_0.44.0 labeling_0.4.3 scuttle_1.14.0
[16] utf8_1.2.4 prodlim_2023.08.28 tzdb_0.4.0
[19] UCSC.utils_1.0.0 bit_4.0.5 purrr_1.0.2
[22] xfun_0.43 beachmat_2.20.0 zlibbioc_1.50.0
[25] jsonlite_1.8.8 recipes_1.0.10 DelayedArray_0.30.1
[28] BiocParallel_1.38.0 parallel_4.4.0 R6_2.5.1
[31] stringi_1.8.4 parallelly_1.37.1 rpart_4.1.23
[34] lubridate_1.9.3 Rcpp_1.0.12 iterators_1.0.14
[37] knitr_1.46 future.apply_1.11.2 readr_2.1.5
[40] Matrix_1.7-0 splines_4.4.0 nnet_7.3-19
[43] timechange_0.3.0 tidyselect_1.2.1 rstudioapi_0.16.0
[46] abind_1.4-5 yaml_2.3.8 timeDate_4032.109
[49] codetools_0.2-20 listenv_0.9.1 tibble_3.2.1
[52] plyr_1.8.9 withr_3.0.0 future_1.33.2
[55] survival_3.5-8 proxy_0.4-27 pillar_1.9.0
[58] BiocManager_1.30.23 renv_1.0.7 foreach_1.5.2
[61] generics_0.1.3 vroom_1.6.5 rprojroot_2.0.4
[64] hms_1.1.3 sparseMatrixStats_1.16.0 munsell_0.5.1
[67] scales_1.3.0 globals_0.16.3 class_7.3-22
[70] glue_1.7.0 tools_4.4.0 data.table_1.15.4
[73] ModelMetrics_1.2.2.2 gower_1.0.1 grid_4.4.0
[76] ipred_0.9-14 colorspace_2.1-0 nlme_3.1-164
[79] GenomeInfoDbData_1.2.12 cli_3.6.2 fansi_1.0.6
[82] S4Arrays_1.4.0 lava_1.8.0 dplyr_1.1.4
[85] gtable_0.3.5 digest_0.6.35 SparseArray_1.4.3
[88] farver_2.1.1 lifecycle_1.0.4 hardhat_1.3.1
[91] httr_1.4.7 bit64_4.0.5 MASS_7.3-60.2